tutorial - NEON pheno packages

https://www.neonscience.org/resources/learning-hub/tutorials/phenocam-phenor-modeling

For the latest version, install phenor from here: https://github.com/bluegreen-labs/phenor (be sure to update all packages for installation to work, and install GenSA)

# example from website: Bartlett NH 
# phenocamr::download_phenocam(
#   frequency = 3,
#   veg_type = "DB", #DB for decid broadlead, EN for evergreen, default is ALL 
#   roi_id = 1000,
#   site = "bartlettir",
#   phenophase = TRUE,
#   out_dir = "." # saves in current working dir
#   )

# evergreens in del norte county (close to north humboldt)
# phenocamr::download_phenocam(
#   frequency = 3,
#   veg_type = "EN", #DB for decid broadlead, EN for evergreen, default is ALL 
#   roi_id = 1000,
#   site = "delnortecounty1",
#   phenophase = TRUE,
#   out_dir = "." # saves in current working dir
#   )

# shrubs in crownpoint NM, NE of Gallup at Navajo Technical University 
#phenocamr::download_phenocam(
#  frequency = 3,
#  roi_id = 1000,
#  site = "ntupinyongarden",
#  phenophase = TRUE,
#  out_dir = "." # saves in current working dir
#  )

# two files: time series and transition data 

# time series - 3 days 
dnc <- read_csv("delnortecounty1_EN_1000_3day.csv", skip = 24)
## Rows: 1942 Columns: 49
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (1): midday_filename
## dbl  (46): year, doy, image_count, midday_r, midday_g, midday_b, midday_gcc,...
## lgl   (1): snow_flag
## date  (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ntu <- read_csv("ntupinyongarden_SH_1000_3day.csv", skip=24)
## Rows: 292 Columns: 49
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (1): midday_filename
## dbl  (46): year, doy, image_count, midday_r, midday_g, midday_b, midday_gcc,...
## lgl   (1): snow_flag
## date  (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# transition data - estimate phenophases for spring and autumn, or when the change in rising or falling Gcc happens 
dnc_t <- read_csv("delnortecounty1_EN_1000_3day_transition_dates.csv", skip=16)
## Rows: 2 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): site, veg_type, direction, gcc_value
## dbl  (6): roi_id, threshold_10, threshold_25, threshold_50, min_gcc, max_gcc
## date (9): transition_10, transition_25, transition_50, transition_10_lower_c...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ntu_t <- read_csv("ntupinyongarden_SH_1000_3day_transition_dates.csv", skip=16) # nothing is in this one 
## Rows: 0 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (19): site, veg_type, roi_id, direction, gcc_value, transition_10, trans...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

explore data

Find the metadata here: https://phenocam.nau.edu/webcam/tools/summary_file_format/

summary(dnc)
##       date                 year           doy          image_count   
##  Min.   :2019-12-12   Min.   :2019   Min.   :  1.00   Min.   : 0.00  
##  1st Qu.:2021-04-10   1st Qu.:2021   1st Qu.: 81.25   1st Qu.:18.00  
##  Median :2022-08-08   Median :2022   Median :176.00   Median :32.00  
##  Mean   :2022-08-08   Mean   :2022   Mean   :178.36   Mean   :31.85  
##  3rd Qu.:2023-12-06   3rd Qu.:2023   3rd Qu.:273.00   3rd Qu.:45.00  
##  Max.   :2025-04-05   Max.   :2025   Max.   :366.00   Max.   :78.00  
##                                                       NA's   :1353   
##  midday_filename       midday_r        midday_g        midday_b    
##  Length:1942        Min.   :21.63   Min.   :35.03   Min.   :22.01  
##  Class :character   1st Qu.:37.18   1st Qu.:39.96   1st Qu.:29.22  
##  Mode  :character   Median :41.46   Median :45.07   Median :32.43  
##                     Mean   :44.51   Mean   :46.82   Mean   :33.32  
##                     3rd Qu.:51.10   3rd Qu.:52.21   3rd Qu.:36.69  
##                     Max.   :68.91   Max.   :68.87   Max.   :63.10  
##                     NA's   :1399    NA's   :1399    NA's   :1399   
##    midday_gcc       midday_rcc         r_mean          r_std       
##  Min.   :0.3428   Min.   :0.1923   Min.   :23.95   Min.   : 0.000  
##  1st Qu.:0.3679   1st Qu.:0.3413   1st Qu.:39.71   1st Qu.: 4.890  
##  Median :0.3757   Median :0.3609   Median :43.98   Median : 6.853  
##  Mean   :0.3753   Mean   :0.3551   Mean   :44.53   Mean   : 6.872  
##  3rd Qu.:0.3827   3rd Qu.:0.3754   3rd Qu.:48.86   3rd Qu.: 8.967  
##  Max.   :0.4069   Max.   :0.4184   Max.   :59.19   Max.   :16.595  
##  NA's   :1399     NA's   :1399     NA's   :1399    NA's   :1399    
##      g_mean          g_std            b_mean          b_std       
##  Min.   :36.67   Min.   : 0.000   Min.   :19.99   Min.   : 0.000  
##  1st Qu.:43.69   1st Qu.: 4.476   1st Qu.:33.89   1st Qu.: 3.703  
##  Median :46.93   Median : 6.125   Median :35.68   Median : 4.691  
##  Mean   :47.35   Mean   : 6.111   Mean   :35.66   Mean   : 4.713  
##  3rd Qu.:50.97   3rd Qu.: 7.715   3rd Qu.:37.50   3rd Qu.: 5.632  
##  Max.   :60.51   Max.   :18.976   Max.   :63.10   Max.   :19.237  
##  NA's   :1399    NA's   :1399     NA's   :1399    NA's   :1399    
##     gcc_mean         gcc_std           gcc_50           gcc_75      
##  Min.   :0.3538   Min.   :0.0000   Min.   :0.3523   Min.   :0.3538  
##  1st Qu.:0.3678   1st Qu.:0.0055   1st Qu.:0.3682   1st Qu.:0.3722  
##  Median :0.3715   Median :0.0067   Median :0.3718   Median :0.3762  
##  Mean   :0.3709   Mean   :0.0072   Mean   :0.3713   Mean   :0.3758  
##  3rd Qu.:0.3749   3rd Qu.:0.0088   3rd Qu.:0.3755   3rd Qu.:0.3808  
##  Max.   :0.3861   Max.   :0.0203   Max.   :0.3865   Max.   :0.3935  
##  NA's   :1399     NA's   :1399     NA's   :1399     NA's   :1399    
##      gcc_90          rcc_mean         rcc_std           rcc_50      
##  Min.   :0.3538   Min.   :0.2248   Min.   :0.0000   Min.   :0.2248  
##  1st Qu.:0.3753   1st Qu.:0.3361   1st Qu.:0.0158   1st Qu.:0.3426  
##  Median :0.3792   Median :0.3494   Median :0.0221   Median :0.3551  
##  Mean   :0.3795   Mean   :0.3473   Mean   :0.0233   Mean   :0.3532  
##  3rd Qu.:0.3845   3rd Qu.:0.3607   3rd Qu.:0.0292   3rd Qu.:0.3673  
##  Max.   :0.4022   Max.   :0.4493   Max.   :0.0606   Max.   :0.4833  
##  NA's   :1399     NA's   :1399     NA's   :1399     NA's   :1399    
##      rcc_75           rcc_90       max_solar_elev  snow_flag     
##  Min.   :0.2248   Min.   :0.2248   Min.   :11.10   Mode:logical  
##  1st Qu.:0.3535   1st Qu.:0.3578   1st Qu.:32.78   NA's:1942     
##  Median :0.3652   Median :0.3706   Median :50.50                 
##  Mean   :0.3623   Mean   :0.3677   Mean   :49.16                 
##  3rd Qu.:0.3753   3rd Qu.:0.3809   3rd Qu.:65.73                 
##  Max.   :0.4933   Max.   :0.5001   Max.   :71.62                 
##  NA's   :1399     NA's   :1399     NA's   :1399                  
##  outlierflag_gcc_mean outlierflag_gcc_50 outlierflag_gcc_75 outlierflag_gcc_90
##  Min.   :0.000000     Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.000000     1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.000000  
##  Median :0.000000     Median :0.000000   Median :0.000000   Median :0.000000  
##  Mean   :0.001041     Mean   :0.002601   Mean   :0.004162   Mean   :0.004683  
##  3rd Qu.:0.000000     3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.000000  
##  Max.   :1.000000     Max.   :1.000000   Max.   :1.000000   Max.   :1.000000  
##  NA's   :20           NA's   :20         NA's   :20         NA's   :20        
##  smooth_gcc_mean  smooth_gcc_50    smooth_gcc_75    smooth_gcc_90   
##  Min.   :0.3557   Min.   :0.3534   Min.   :0.3503   Min.   :0.3546  
##  1st Qu.:0.3672   1st Qu.:0.3675   1st Qu.:0.3716   1st Qu.:0.3748  
##  Median :0.3704   Median :0.3710   Median :0.3751   Median :0.3780  
##  Mean   :0.3703   Mean   :0.3709   Mean   :0.3754   Mean   :0.3794  
##  3rd Qu.:0.3742   3rd Qu.:0.3747   3rd Qu.:0.3797   3rd Qu.:0.3839  
##  Max.   :0.3820   Max.   :0.3829   Max.   :0.3874   Max.   :0.3953  
##                                                                     
##  smooth_rcc_mean  smooth_rcc_50    smooth_rcc_75    smooth_rcc_90   
##  Min.   :0.3097   Min.   :0.3071   Min.   :0.3062   Min.   :0.3209  
##  1st Qu.:0.3404   1st Qu.:0.3451   1st Qu.:0.3553   1st Qu.:0.3597  
##  Median :0.3483   Median :0.3532   Median :0.3625   Median :0.3684  
##  Mean   :0.3474   Mean   :0.3530   Mean   :0.3622   Mean   :0.3679  
##  3rd Qu.:0.3544   3rd Qu.:0.3606   3rd Qu.:0.3716   3rd Qu.:0.3778  
##  Max.   :0.3925   Max.   :0.4169   Max.   :0.4210   Max.   :0.4204  
##                                                                     
##  smooth_ci_gcc_mean smooth_ci_gcc_50   smooth_ci_gcc_75   smooth_ci_gcc_90  
##  Min.   :0.001890   Min.   :0.002210   Min.   :0.002070   Min.   :0.002180  
##  1st Qu.:0.001950   1st Qu.:0.002290   1st Qu.:0.002160   1st Qu.:0.002270  
##  Median :0.001990   Median :0.002350   Median :0.002210   Median :0.002320  
##  Mean   :0.004677   Mean   :0.004991   Mean   :0.004875   Mean   :0.005106  
##  3rd Qu.:0.002100   3rd Qu.:0.002470   3rd Qu.:0.002370   3rd Qu.:0.002500  
##  Max.   :0.020000   Max.   :0.020000   Max.   :0.020000   Max.   :0.020000  
##                                                                             
##  smooth_ci_rcc_mean smooth_ci_rcc_50   smooth_ci_rcc_75   smooth_ci_rcc_90  
##  Min.   :0.007170   Min.   :0.008310   Min.   :0.007410   Min.   :0.007380  
##  1st Qu.:0.007430   1st Qu.:0.008570   1st Qu.:0.007640   1st Qu.:0.007610  
##  Median :0.007630   Median :0.008710   Median :0.007770   Median :0.007740  
##  Mean   :0.009641   Mean   :0.010662   Mean   :0.009827   Mean   :0.009802  
##  3rd Qu.:0.007960   3rd Qu.:0.009175   3rd Qu.:0.008178   3rd Qu.:0.008148  
##  Max.   :0.020000   Max.   :0.020000   Max.   :0.020000   Max.   :0.020000  
##                                                                             
##     int_flag   
##  Min.   :1     
##  1st Qu.:1     
##  Median :1     
##  Mean   :1     
##  3rd Qu.:1     
##  Max.   :1     
##  NA's   :1643
names(dnc)
##  [1] "date"                 "year"                 "doy"                 
##  [4] "image_count"          "midday_filename"      "midday_r"            
##  [7] "midday_g"             "midday_b"             "midday_gcc"          
## [10] "midday_rcc"           "r_mean"               "r_std"               
## [13] "g_mean"               "g_std"                "b_mean"              
## [16] "b_std"                "gcc_mean"             "gcc_std"             
## [19] "gcc_50"               "gcc_75"               "gcc_90"              
## [22] "rcc_mean"             "rcc_std"              "rcc_50"              
## [25] "rcc_75"               "rcc_90"               "max_solar_elev"      
## [28] "snow_flag"            "outlierflag_gcc_mean" "outlierflag_gcc_50"  
## [31] "outlierflag_gcc_75"   "outlierflag_gcc_90"   "smooth_gcc_mean"     
## [34] "smooth_gcc_50"        "smooth_gcc_75"        "smooth_gcc_90"       
## [37] "smooth_rcc_mean"      "smooth_rcc_50"        "smooth_rcc_75"       
## [40] "smooth_rcc_90"        "smooth_ci_gcc_mean"   "smooth_ci_gcc_50"    
## [43] "smooth_ci_gcc_75"     "smooth_ci_gcc_90"     "smooth_ci_rcc_mean"  
## [46] "smooth_ci_rcc_50"     "smooth_ci_rcc_75"     "smooth_ci_rcc_90"    
## [49] "int_flag"
mean(dnc$gcc_90, na.rm=T)
## [1] 0.3795198
mean(ntu$gcc_90, na.rm=T)
## [1] 0.3490447

make a plot

# evergreens
ggplot(dnc, aes(x=date, y=gcc_90)) + 
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 1399 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1399 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(dnc, aes(x=date, y=smooth_gcc_90)) +
  geom_line() 

dnc %>% dplyr::filter(year<2025) %>% 
  ggplot(aes(x=doy, y=smooth_gcc_90, col=as.factor(year))) + 
  geom_line()

# shrubs 
ggplot(ntu, aes(x=date, y=smooth_gcc_90)) +
  geom_line() 

# only available for year 2019 and error after like august 

dnc %>% dplyr::filter(year==2019) %>% 
ggplot() +
  geom_line(aes(x=date, y=smooth_gcc_90, col="Del Norte")) + 
  geom_line(data=ntu, aes(x=date, y=smooth_gcc_90, col="NTU")) 

# years do not overlap

dnc %>% group_by(doy) %>%
  summarize(gcc90 = mean(smooth_gcc_90)) %>% 
ggplot() +
  geom_line(aes(x=doy, y=gcc90, col="Del Norte")) + 
  geom_line(data=ntu, aes(x=doy, y=smooth_gcc_90, col="NTU")) 

# need to find another location for Gallup 

transition days

td <- dnc_t %>% 
  dplyr::filter(direction=="rising")

# create a simple line graph of the smooth Green Chromatic Coordinate (Gcc)
# and add points for transition dates
ggplot(dnc, aes(x=as.Date(date), y=smooth_gcc_90)) + 
  geom_line() + 
  geom_point(data=td, aes(x=as.Date(transition_25),
                          y=threshold_25),
             col="red")

# transition date is only available for the year 2023? Plotting when 25% of greenness amplitude (of 90th percentile) is reached 
dnc %>% dplyr::filter(year==2023) %>% 
ggplot(aes(x=as.Date(date), y=smooth_gcc_90)) + 
  geom_line() + 
  geom_point(data=td, aes(x=as.Date(transition_25),
                          y=threshold_25),
             col="red")

climate data: CMIP5

Using the example from the website, New Hampshire Deciduous Broadleaf, to see how it goes

bnh <- read_csv("bartlettir_DB_1000_3day.csv", skip = 24)
## New names:
## Rows: 1056 Columns: 32
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): bartlettir_2008_04_22_120541.jpg dbl (25): 2008, 113, 69, 101.00494,
## 103.06487, 81.16984, 0.36133, 0.35411, ... lgl (5): NA...28, NA...29, NA...30,
## NA...31, NA...32 date (1): 2008-04-22
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `NA` -> `NA...28`
## • `NA` -> `NA...29`
## • `NA` -> `NA...30`
## • `NA` -> `NA...31`
## • `NA` -> `NA...32`
bnh_t <- read_csv("bartlettir_DB_1000_3day_transition_dates.csv", skip = 16)
## Rows: 72 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): site, veg_type, direction, gcc_value
## dbl  (6): roi_id, threshold_10, threshold_25, threshold_50, min_gcc, max_gcc
## date (9): transition_10, transition_25, transition_50, transition_10_lower_c...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# download source cmip5 data into your temporary directory
# please note this is a large download: >4GB! 
#phenor::download_cmip5(
#  year = 2090,
#  path = tempdir(),
#  model = "MIROC5",
#  scenario = "rcp85"
#  )
# note this is an old function, not available anymore 

use vignette

https://bluegreen-labs.github.io/phenocamr/articles/phenocamr-vignette.html

source has clear steps, but uses base plotting

# read as phenocamr type 
df = read_phenocam("bartlettir_DB_1000_3day.csv")

df <- expand_phenocam(df) # expand every 3 day to every day 

df <- detect_outliers(df) # detect outliers

df <- smooth_ts(df) # smooth data using AIC 

phenology_dates <- phenophases(df, internal = TRUE) # calculate rising and falling

as.data.frame(df$data) %>% 
  mutate(date = as.Date(date)) %>% 
  ggplot(aes(x=date, y=smooth_gcc_90)) + 
  geom_line() +
  labs(x="Date", y="GCC") + 
  # rising "spring" greenup dates 
  geom_vline(data=phenology_dates$rising, aes(xintercept=transition_50), col="green") + 
  # falling 'autumn' senescence dates 
  geom_vline(data=phenology_dates$falling, aes(xintercept=transition_50), col="brown")

climate data: Daymet

Use the function merge_daymet

Daymet is continuous, gridded estimates of weather and climate variables creted through statistical interpolation of ground based measurements. Read more here: https://daymet.ornl.gov/

bnh_clim = merge_daymet("bartlettir_DB_1000_3day.csv")

names(bnh_clim)
##  [1] "site"           "veg_type"       "roi_id"         "frequency"     
##  [5] "lat"            "lon"            "elev"           "solar_elev_min"
##  [9] "header"         "data"
bnh_data = bnh_clim$data

# plot daily rainfall and temperatures 
ggplot(bnh_data, aes(x=date, y=prcp..mm.day.))+
  geom_line() + 
  labs(y="precip [mm]", title="Daily Precipitation") +
  theme_bw() 

ggplot(bnh_data) +
  geom_point(aes(x=date, y=tmax..deg.c., col="tmax"), col="red") +
  geom_point(aes(x=date, y=tmin..deg.c., col="tmin"), col="blue") +
  labs(y="temp. deg C", title = "Daily temperature") +
  theme_bw()

Explore data with climate

# day of the year patterns 
ggplot(bnh_data) +
  geom_point(aes(x=doy, y=gcc_90))
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).

# precipitation
ggplot(bnh_data) + 
  geom_point(aes(x=prcp..mm.day., y=gcc_90, col=doy)) + scale_color_viridis_c()
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).

# day of the year with daylength 
ggplot(bnh_data) + 
   geom_point(aes(x=doy, y=gcc_90, col=dayl..s.)) + scale_color_viridis_c()
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).

# daylength 
ggplot(bnh_data) +
  geom_point(aes(x=doy, y=gcc_90, col=dayl..s.)) + scale_color_viridis_c()
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).

# min temperatures 
ggplot(bnh_data) +
  geom_point(aes(x=tmin..deg.c., y=gcc_90))
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).

# max temperatures 
ggplot(bnh_data) +
  geom_point(aes(x=tmax..deg.c., y=gcc_90))
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).

start with linear model

# subset data 
data_mod = bnh_data %>% dplyr::filter(year<2014)

# start with one variable 
lm_test = lm(data=data_mod, gcc_90~dayl..s.)
# see residuals, p-vals, Rsquares, etc. 
summary(lm_test)
## 
## Call:
## lm(formula = gcc_90 ~ dayl..s., data = data_mod)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.072102 -0.011738  0.008489  0.015394  0.053883 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.042e-01  4.963e-03   41.15   <2e-16 ***
## dayl..s.    4.627e-06  1.123e-07   41.21   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02429 on 672 degrees of freedom
##   (22 observations deleted due to missingness)
## Multiple R-squared:  0.7165, Adjusted R-squared:  0.7161 
## F-statistic:  1699 on 1 and 672 DF,  p-value: < 2.2e-16
# check with more variables 
lm_test2 = lm(data=data_mod, gcc_90~dayl..s.+tmin..deg.c.)

# predict with remaining years
data_pred = bnh_data %>% 
  dplyr::filter(year>=2014) %>% 
  select(gcc_90, dayl..s., tmin..deg.c., year, doy, date) 

# predict with one var
data_pred$pred1_gcc_90 = predict.lm(lm_test, data_pred)

# predict with 2 vars 
data_pred$pred2_gcc_90 = predict.lm(lm_test2, data_pred)

ggplot(data_pred) + 
  geom_point(aes(x=date, y=pred1_gcc_90, col="1 var")) + 
  geom_point(aes(x=date, y=pred2_gcc_90, col="2 vars"))+ 
  geom_point(aes(x=date, y=gcc_90, col="obs")) 
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

use models from phenoR package

# read in parameter ranges 
path <- sprintf("%s/extdata/parameter_ranges.csv",path.package("phenor"))
par_ranges <- read.table(path,
                        header = TRUE,
                        sep = ",")

lin_par = par_ranges %>%
  dplyr::filter(model=="LIN") %>% 
  select(a,b)

# load data from phenor package
bnh = phenocam_DB$bartlettir # in proper format for models 

# data must be formated for input 
LIN(par=c(a=1000, b=1000), data=bnh)
## [1] 5410.326 6616.848 9092.391 6603.261 8801.630 6894.022 4652.174 5904.891